## Table 13 in Online Appendix
## Paper: Ethnic Riots and Democratic Transition: A Lesson from Indonesia
## Last updated: 1 June 2015
## author: RJT
## Using data:"/Users/risatoha/Documents/Political Competition/Revisions/Conditional Accept/BJPS_data_clean_052115.dta")

##Get necessary packages. Code lines 7 to 26 may not be necessary if relevant packages are already loaded
ipak <- function(pkg){
  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
  if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
  sapply(pkg, require, character.only = TRUE)
}
packages <- c("lme4", "mitools", "effects", "Amelia")  
ipak(packages)

install.packages("glmmADMB", type = "source", dep = TRUE,
                 repos="http://r-forge.r-project.org")
library(glmmADMB)

install.packages("coefplot2", repos = "http://www.math.mcmaster.ca/bolker/R", type = "source")
library(coefplot2)

# make sure nlme is not loaded
detach("package:nlme", unload = TRUE) # loaded via a namespace and will not unload
remove.packages("nlme")

## Start here
## loading Amelia
library(Amelia)
library(foreign)

## attaching the data
data<-read.dta("/Users/risatoha/Documents/Political Competition/Revisions/Conditional Accept/BJPS_data_clean_052115.dta")
dim(data)
data[1:10,]
summary(data)

## keep only necessary variables 
amdat<-subset(data, select=-c(b4el, compcut3, compgr2dist, conflictprov, delta_votmar, elf3sq, elfeth, elyear, golgr2dist, gr2relprop, outliers, rel2dist50, rel3dist50, resid, revelprox, unsfirprov, yhat, nongolkarvs, muslimpartyvs2)) 
dim(amdat)
summary(amdat)
amdat[1:10,]

##running Amelia
a.out<-amelia(amdat, m=10, idvars = c("province", "prov_code", "district", "dist_code", "year", "sepstrict", "turnopp", "urban", "postsoeharto","java"))
## saving results
save(a.out, file="imputations.RData")
## create outdata1.dta, … until outdata10.dta
write.amelia(obj=a.out, file.stem="outdata", format="dta")
## reading amelia output into R##
data1<-read.dta("/Users/risatoha/outdata1.dta")
data2<-read.dta("/Users/risatoha/outdata2.dta")
data3<-read.dta("/Users/risatoha/outdata3.dta")
data4<-read.dta("/Users/risatoha/outdata4.dta")
data5<-read.dta("/Users/risatoha/outdata5.dta")
data6<-read.dta("/Users/risatoha/outdata6.dta")
data7<-read.dta("/Users/risatoha/outdata7.dta")
data8<-read.dta("/Users/risatoha/outdata8.dta")
data9<-read.dta("/Users/risatoha/outdata9.dta")
data10<-read.dta("/Users/risatoha/outdata10.dta")

## check out descriptive statistics of mi datasets ##
summary(data1)
summary(data2)
summary(data3)
summary(data4)
summary(data5)
summary(data6)
summary(data7)
summary(data8)
summary(data9)
summary(data10)

## Here I switch to Steve Worthington's code
## is loading this data necessary? NOPE
load("~/Documents/Political Competition/Revisions/imputations.RData")
# create vector of observation-level random effects for each MI data frame
a.out$imputations <- lapply(a.out$imputations, function(x) {
  x$obs_level <- factor(1:nrow(a.out$imputations$imp1))
  return(x)
})
# change dist_code to a factor
a.out$imputations <- lapply(a.out$imputations, function(x) {
  x$dist_code <- factor(x$dist_code)
  return(x)
})
# subset each MI data frame
a.out$imputations <- lapply(a.out$imputations, function(x) {
  x[x$sepstrict == 0, ]
})

## Run negative binomial regression through 10 datasets and get average coefficients of each model.
# using a negative binomial model (with or without zero inflation)
# create empty list for models
admb_models <- vector(mode = "list", length = length(a.out$imputations))

# loop through the MI datasets fitting 10 models. This is for Column 1 Table 1 in paper
for (i in seq_along(a.out$imputations)){
  print(i)
  admb_models[[i]] <- glmmadmb(unkomptemp ~ revvotmar + afterel + rel5050gr21 + unkomptemp_1 + securityadj+
                                 lpdrbcap + lpop + larea + urban + java + postsoeharto + 
                                 (1|dist_code),
                               family = "nbinom",
                               zeroInflation = FALSE,
                               data = a.out$imputations[[i]])
}
save(admb_models, file = "admb_models.Rdata")
# -----------------
# get summaries and coefficients for each model manually. 
admb_models_summaries <- lapply(admb_models, summary)
admb_models_coefs <- lapply(admb_models_summaries, coef)
# get average coefficients for the 10 models
admb_combined <- Reduce("+", admb_models_coefs) / length(admb_models_coefs)
print(admb_combined)
write.csv(admb_combined, file="admb_combined.csv")
read.csv("admb_combined.csv")

# Estimate  Std. Error    z value     Pr(>|z|)
# (Intercept)  -1.818961e+01 2.53001e+00 -7.1912777 3.639634e-12
# revvotmar    -1.371652e-02 4.49009e-03 -3.0548678 2.368918e-03
# afterel       5.842508e-01 1.95227e-01  2.9923988 2.881747e-03
# rel5050gr21   1.031547e+00 5.65196e-01  1.8325681 1.228729e-01
# unkomptemp_1  7.905188e-02 2.60521e-02  3.0697138 3.105519e-03
# securityadj   6.630282e-05 8.98385e-05  0.7006706 4.941646e-01
# lpdrbcap      2.857804e-01 1.46636e-01  1.9476033 5.397770e-02
# lpop          6.296535e-01 1.91642e-01  3.2874440 1.840693e-03
# larea         4.312003e-01 1.54309e-01  2.7936361 5.439308e-03
# urban         1.476644e+00 6.01322e-01  2.4555987 1.423182e-02
# java          1.548029e-01 4.66525e-01  0.3320162 7.457742e-01
# postsoeharto  1.209776e+00 2.67681e-01  4.5206206 8.391501e-06

## Put this into csv
write.csv(admb_combined, file="admb_combined.csv")
read.csv("admb_combined.csv")

# X      Estimate  Std..Error    z.value     Pr...z..
# 1   (Intercept) -1.818961e+01 2.53001e+00 -7.1912777 3.639634e-12
# 2     revvotmar -1.371652e-02 4.49009e-03 -3.0548678 2.368918e-03
# 3       afterel  5.842508e-01 1.95227e-01  2.9923988 2.881747e-03
# 4   rel5050gr21  1.031547e+00 5.65196e-01  1.8325681 1.228729e-01
# 5  unkomptemp_1  7.905188e-02 2.60521e-02  3.0697138 3.105519e-03
# 6   securityadj  6.630282e-05 8.98385e-05  0.7006706 4.941646e-01
# 7      lpdrbcap  2.857804e-01 1.46636e-01  1.9476033 5.397770e-02
# 8          lpop  6.296535e-01 1.91642e-01  3.2874440 1.840693e-03
# 9         larea  4.312003e-01 1.54309e-01  2.7936361 5.439308e-03
# 10        urban  1.476644e+00 6.01322e-01  2.4555987 1.423182e-02
# 11         java  1.548029e-01 4.66525e-01  0.3320162 7.457742e-01
# 12 postsoeharto  1.209776e+00 2.67681e-01  4.5206206 8.391501e-06

## For Column 2 Table 1
admb_model2 <- vector(mode = "list", length = length(a.out$imputations))
for (i in seq_along(a.out$imputations)){
  print(i)
  admb_model2[[i]] <- glmmadmb(unkomptemp ~ golkarvs + afterel + rel5050gr21 + unkomptemp_1 + securityadj+
                                 lpdrbcap + lpop + larea + urban + java + postsoeharto +
                                 (1|dist_code),
                               family = "nbinom",
                               zeroInflation = FALSE,
                               data = a.out$imputations[[i]])
}
save(admb_model2, file = "admb_model2.Rdata")
## Get the coefficient and summaries
admb_model2_summaries <- lapply(admb_model2, summary)
admb_model2_coefs <- lapply(admb_model2_summaries, coef)
# get average coefficients for the 10 models
admb2_combined <- Reduce("+", admb_model2_coefs) / length(admb_model2_coefs)
## print results
print(admb2_combined)

# Estimate  Std. Error    z value     Pr(>|z|)
# (Intercept)  -1.970877e+01 2.55130e+00 -7.7268351 5.090189e-14
# golkarvs      2.870041e+00 4.96163e-01  5.7841770 8.042580e-09
# afterel       6.919511e-01 1.92739e-01  3.5898493 3.500197e-04
# rel5050gr21   9.227554e-01 5.55320e-01  1.6671281 1.371905e-01
# unkomptemp_1  8.580347e-02 2.23676e-02  3.8639533 1.748914e-04
# securityadj   7.906105e-05 8.24392e-05  0.9486000 3.755725e-01
# lpdrbcap      3.892091e-01 1.46746e-01  2.6512133 8.584952e-03
# lpop          6.556033e-01 1.91147e-01  3.4314284 1.016252e-03
# larea         3.860485e-01 1.54006e-01  2.5060327 1.270714e-02
# urban         1.382202e+00 6.00441e-01  2.3018459 2.156045e-02
# java          3.970422e-01 4.68545e-01  0.8475004 4.056134e-01
# postsoeharto  1.631664e+00 2.70238e-01  6.0385516 2.209315e-09

## put this into csv 
write.csv(admb2_combined, file="admb2_combined.csv")
read.csv(admb2_combined, file="admb2_combined.csv")

# X      Estimate  Std..Error    z.value     Pr...z..
# 1   (Intercept) -1.970877e+01 2.55130e+00 -7.7268351 5.090189e-14
# 2      golkarvs  2.870041e+00 4.96163e-01  5.7841770 8.042580e-09
# 3       afterel  6.919511e-01 1.92739e-01  3.5898493 3.500197e-04
# 4   rel5050gr21  9.227554e-01 5.55320e-01  1.6671281 1.371905e-01
# 5  unkomptemp_1  8.580347e-02 2.23676e-02  3.8639533 1.748914e-04
# 6   securityadj  7.906105e-05 8.24392e-05  0.9486000 3.755725e-01
# 7      lpdrbcap  3.892091e-01 1.46746e-01  2.6512133 8.584952e-03
# 8          lpop  6.556033e-01 1.91147e-01  3.4314284 1.016252e-03
# 9         larea  3.860485e-01 1.54006e-01  2.5060327 1.270714e-02
# 10        urban  1.382202e+00 6.00441e-01  2.3018459 2.156045e-02
# 11         java  3.970422e-01 4.68545e-01  0.8475004 4.056134e-01
# 12 postsoeharto  1.631664e+00 2.70238e-01  6.0385516 2.209315e-09

## For Column 3 Table 2 
admb_model3 <- vector(mode = "list", length = length(a.out$imputations))
for (i in seq_along(a.out$imputations)){
  print(i)
  admb_model3[[i]] <- glmmadmb(unkomptemp ~ gol8797_percent + afterel + rel5050gr21 + unkomptemp_1 + securityadj+
                                 lpdrbcap + lpop + larea + urban + java + postsoeharto +
                                 (1|dist_code),
                               family = "nbinom",
                               zeroInflation = FALSE,
                               data = a.out$imputations[[i]])
}
save(admb_model3, file = "admb_model3.Rdata")
## Get the coefficient and summaries
admb_model3_summaries <- lapply(admb_model3, summary)
admb_model3_coefs <- lapply(admb_model3_summaries, coef)
# get average coefficients for the 10 models
admb3_combined <- Reduce("+", admb_model3_coefs) / length(admb_model3_coefs)
## print results
print(admb3_combined)

# Estimate  Std. Error    z value     Pr(>|z|)
# (Intercept)     -1.760504e+01 2.53568e+00 -6.9444503 2.819217e-11
# gol8797_percent  8.164206e-03 9.13668e-03  0.8964150 4.708839e-01
# afterel          4.849278e-01 1.93853e-01  2.5013334 1.262528e-02
# rel5050gr21      8.578777e-01 5.73738e-01  1.5025393 1.359291e-01
# unkomptemp_1     7.212813e-02 2.67473e-02  2.7322231 8.905038e-03
# securityadj      6.531604e-05 9.01201e-05  0.6925416 5.127534e-01
# lpdrbcap         2.194271e-01 1.46836e-01  1.4912551 1.439601e-01
# lpop             6.595513e-01 1.92877e-01  3.4223928 1.479308e-03
# larea            4.302643e-01 1.55532e-01  2.7654743 5.927280e-03
# urban            1.421194e+00 6.04761e-01  2.3499415 1.900640e-02
# java            -1.614486e-01 4.67902e-01 -0.3461365 7.142933e-01
# postsoeharto     9.566768e-01 2.59248e-01  3.6913674 2.737589e-04

## put this into csv
write.csv(admb3_combined, file="admb3_combined.csv")
read.csv("admb3_combined.csv")
# 
# X      Estimate  Std..Error    z.value     Pr...z..
# 1      (Intercept) -1.760504e+01 2.53568e+00 -6.9444503 2.819217e-11
# 2  gol8797_percent  8.164206e-03 9.13668e-03  0.8964150 4.708839e-01
# 3          afterel  4.849278e-01 1.93853e-01  2.5013334 1.262528e-02
# 4      rel5050gr21  8.578777e-01 5.73738e-01  1.5025393 1.359291e-01
# 5     unkomptemp_1  7.212813e-02 2.67473e-02  2.7322231 8.905038e-03
# 6      securityadj  6.531604e-05 9.01201e-05  0.6925416 5.127534e-01
# 7         lpdrbcap  2.194271e-01 1.46836e-01  1.4912551 1.439601e-01
# 8             lpop  6.595513e-01 1.92877e-01  3.4223928 1.479308e-03
# 9            larea  4.302643e-01 1.55532e-01  2.7654743 5.927280e-03
# 10           urban  1.421194e+00 6.04761e-01  2.3499415 1.900640e-02
# 11            java -1.614486e-01 4.67902e-01 -0.3461365 7.142933e-01
# 12    postsoeharto  9.566768e-01 2.59248e-01  3.6913674 2.737589e-04

## Plotting coefficients of different models above. 
library(coefplot2)
vn <- c("revvotmar", "golkarvs", "afterel", "unkomptemp_1", "lpdrbcap", "lpop", "urban", "java", "unemp", "postsoeharto","securityadj", "rel5050gr21", "gol8797_percent")
coefplot2( list(Model1 = admb_models[[1]],
           Model2 = admb_model2[[1]],
          Model3 = admb_model3[[1]]),
           varnames = vn,
           legend = TRUE)

###-----------------------
  # create a dataset with predictors set at desired levels
  predDat <- with(a.out$imputations[[1]],
                  expand.grid(golkarvs = c(0.06, 0.9),
                              afterel = "1",
                              rel5050gr21 = mean(rel5050gr21, na.rm = TRUE),
                              securityadj=mean(securityadj,na.rm=TRUE),
                              unkomptemp_1 = mean(unkomptemp_1, na.rm = TRUE),
                              lpdrbcap = mean(lpdrbcap, na.rm = TRUE),
                              lpop = mean(lpop, na.rm = TRUE),
                              larea=mean(larea,na.rm=TRUE),
                              urban = "0",
                              java = "0",
                              postsoeharto="1")
  )
predDat$golkarvs <- factor(predDat$golkarvs)

# CODE DIRECTLY BELOW STILL NEEDS SOME WORK
# design matrix (fixed effects)
mm <- model.matrix(delete.response(terms(admb_models[[1]])), predDat)
# linear predictor (for GLMMs, back-transform this with the
#  inverse link function (e.g. plogis() for binomial, beta;
#  exp() for Poisson, negative binomial
newdat$distance <- exp(mm %*% fixef(admb_models[[1]]))
predvar <- diag(mm %*% vcov(admb_models[[1]]) %*% t(mm))
newdat$SE <- sqrt(predvar) 
newdat$SE2 <- sqrt(predvar + admb_models[[1]]$alpha^2)

# -----------------------
# predict function for glmm.admb - not compatible with other packages
# model = fitted glmm.admb model object
# newdata = new dataset to use in fitting (or test with old data)
# islog = was there an implicit or explicit log-link function used by glmm.admb? (optional, default = TRUE)

predict_admb <- function(model, newdata, islog = TRUE) {
  # Construct model matrix, nobs x np
  MM <- model.matrix(model$fixed, data=newdata, contrasts.arg = NULL)
  beta <- as.vector(model$b)
  phat <- MM %*% beta
  if (islog==TRUE) phat <- exp(phat)
  return (phat)
}

#   1. Predict using old data for comparison
#   2. Compare with fitted values returned from GLMM.admb and original data
#   3. Note: implicit log-link function

phat <- as.vector(model$fitted);
fvec <- predict_admb(model = admb_models[[1]], newdata = predDat, islog = FALSE)
pobs <- as.vector(newdata$N_Species)

#   1. Compare against observed and fitted results

library("ggplot2")
qplot(x=pobs, y=phat, geom="point") + xlab("Observed") + ylab("Predicted (fitted)")
qplot(x=pobs, y=fvec) + xlab("Observed") + ylab("Fixed-only predictions")
qplot(x=phat, y=fvec) + xlab("Predicted (fitted)") + ylab("Fixed-only predictions")







# -----------------
# use mitools to combine the estimates
# https://stat.ethz.ch/pipermail/r-sig-mixed-models/2011q1/015765.html
# extract betas and standard errors
betas_admb <- MIextract(admb_models, fun = fixef) # not coef() for glmer
vars_admb <- MIextract(admb_models, fun = vcov)
vars_admb <- lapply(vars_admb, as.matrix)
# combine the results
admb_mitools <- summary(MIcombine(betas_admb, vars_admb))
## The results below are consistent with the above ones
#Multiple imputation results:
# MIcombine.default(betas_admb, vars_admb)
#results           se        (lower        upper) missInfo
#(Intercept)  -1.837876e+01 2.5895461116 -2.345513e+01 -13.302391031      4 %
#revvotmar    -1.414779e-02 0.0045186847 -2.300496e-02  -0.005290615      2 %
# afterel       5.890260e-01 0.1965258879  2.038317e-01   0.974220310      1 %
# rel5050gr21   1.259429e+00 0.8040776317 -3.738704e-01   2.892728469     54 %
# unkomptemp_1  8.376720e-02 0.0282409544  2.840747e-02   0.139126932      3 %
# securityadj   6.221341e-05 0.0000901754 -1.148352e-04   0.000239262     12 %
# lpdrbcap      2.751832e-01 0.1524887994 -2.382941e-02   0.574195745      6 %
# lpop          6.500910e-01 0.1984136965  2.610584e-01   1.039123550      5 %
# larea         4.188401e-01 0.1552153234  1.146035e-01   0.723076775      2 %
# urban         1.453154e+00 0.6016399266  2.739399e-01   2.632367119      1 %
# java          1.446105e-01 0.4730088156 -7.825791e-01   1.071800149      3 %
# postsoeharto  1.239196e+00 0.2760928732  6.980047e-01   1.780387290      3 %

# -----------------------------------------------------------------------
# plots

install.packages("coefplot2", repos = "http://www.math.mcmaster.ca/bolker/R", type = "source")
library(coefplot2)
op <- par(mfrow = c(1, 2))
coefplot2(admb_mitools[-0.7, "results"], admb_mitools[-0.7, "se"],
          varnames = rownames(admb_mitools)[-0.7], col.pts = "darkred",
          main = "Negative binomial model")
coefplot2(glmer_mitools[-1, "results"], glmer_mitools[-1, "se"],
          varnames = rownames(glmer_mitools)[-1], col.pts = "darkblue",
          main = "Poisson model")
par(op)

###------


## Combine all imputed datasets into one
## load Zelig
library(Zelig)
dataimpute<-mi(data1, data2, data3, data4, data5, data6, data7, data8, data9,data10)
## save bound imputed datasets into one R data.
save(dataimpute, file="mydataimpute.Rdta")  
## save the data into CSV format, to be opened later in Stata
write.csv(dataimpute, file="imputed_data.csv")

## open imputed_data.csv, save as "imputed_data_Amelia_071514.csv## 
## I checked the imputed_data_Amealia_071514.csv file, and it looks like it just appended all the data files into one giant one. 


## Use Zelig to calculate predicted values and generate graphs
## load necessary packages

update.packages()
library(Zelig)
library(ZeligChoice)
library(foreign)
data<-read.dta("/Users/risatoha/Documents/Job Market/Writing Samples/Amelia imputed results.dta")
dim(data)
data[1:10,]
names(data)
summary(data)

##Regress existing model from Table 6 Model 7
z.out <- zelig(unkomptemp~ revvotmar + revelprox+ elf2 + unkomptemp_1 + lpdrb + lpop + urban + java, model = "negbinom" , data = subset(data, postsoeharto==1 & sepstrict==0))  # Run likelihood model
print(z.out)
summary(z.out)
stop()

## Expected and Predicted Values of key independent variables
x.out <- setx(z.out, golkarvs = 0.10)  # Set chosen values, Xc. 
print(x.out)                          # Variables not specified will be set to their mean
s.out <- sim(z.out, x = x.out)  # Simulate Predicted and Expected Values  summary(s.out)
plot(s.out)

## Plotting Expected and Predicted riots, given set values of Xs  
par(mfrow=c(2,1))
names(s.out)                                        # List the names of all the objects in this list
plot(density(s.out$qi$ev),main="Expected Values") 
plot(density(s.out$qi$pr),main="Predicted Values")
stop()

## Plotting Expected Riot for High and Low Electoral Competitiveness

## the following group of code doesn't work

par(mfrow=c(3,2))
x.out <- setx(z.out, revvotmar = -10)
s.out.low <- sim(z.out, x = x.out)	
plot(density(s.out.low$qi$ev), ylim=c(0,40),xlim=c(0,0.1),xlab= "Incidence of Riots", lwd=3, lty= 3, main="Expected Riots at Low & High EC")
par(new=TRUE)
x.out <- setx(z.out, revvotmar = -90)
s.out.high <- sim(z.out, x = x.out)
plot(density(s.out.high$qi$ev),ylim=c(0,40),xlim=c(0,0.1),xlab= "Incidence of Riots",col="red", type="l", lwd= 3, lty=1, main="Expected Riots at Low & High EC")
text(x=0.05, y=5, expression(paste("..... = Low EC")))
text(x=0.05, y=10, expression(paste("----- = High EC")))

## graphing imputed results from one of the imputed datasets
hist(a.out$imputations[[4]]$elf2, col="grey", border= "white")
hist(a.out$imputations[[5]]$revvotmar, col= "grey", border= "white")